/*==================================================================
Project: 	INCREASING THE DEMAND FOR WORKERS WITH A CRIMINAL RECORD
Info: 		Creates main figures for paper.
Program: 	1_mainfigures.do
Created: 	January 4, 2022
Edited:  	July 12, 2022
Purpose: 	Creates main figures for paper on workers with a criminal conviction at the Platform.
Note: 		Set the working directory to the Replication folder that contains this do file.
==================================================================*/
cap log close
clear all
version 16, permanently
discard

***** Set paths.
global Main "`c(pwd)'"
cd "$Main"

* Data directory
global Data "$Main/analysis_data"

* Figures directory 
global Figures "$Main/output_figures"

* Install packages
ssc install ftools, replace
ssc install ivreg2, replace
ssc install reghdfe, replace
ssc install grstyle, replace
ssc install palettes, replace
ssc install colrspace, replace
ssc install gtools, replace
ssc install blindschemes, replace
ssc install ipfweight, replace
ssc install ranktest, replace
ssc install sencode, replace
net install allston, from("https://raw.githubusercontent.com/dballaelliott/allston/master/") replace

* Set graph style
graph set window fontface "Times New Roman"
set scheme s2mono
grstyle init platform, replace 
grstyle clockdir subtitle_position 11
grstyle gridringstyle subtitle_ring 0
grstyle set legend 7, inside nobox
grstyle gsize text medium     
grstyle gsize text_option medium     
grstyle gsize filled_text medium     
grstyle gsize key_label medium    
grstyle gsize label medium    
* Turn off grid lines
grstyle yesno draw_major_hgrid       no
grstyle yesno draw_major_vgrid       no
gr_setscheme, scheme(platform)
graph set eps fontface "Times New Roman"

/*==================================================================
*** RUN A LOG FILE ***
==================================================================*/
global date = "$S_DATE"
global LogPath = "$Main/log"
capture log close
log using "$LogPath/WCC_$date.log", append

/*==================================================================
*** LOAD PROGRAMS ***
==================================================================*/

/***
Program: residualize
Purpose: Calculate residuals
***/
program define residualize
/****************************************************************************** 
required: varname varlist
first variable = variable to residualize
second variable (or varlist) = variable that we are residualizing on (i.e. pulling out)
also required: 
either replace or [gen]erate
generate puts the residualized version in a new variable
replace replaces varname with the residualized version
******************************************************************************/
syntax varlist(min=1 ts fv), [replace] [GENerate(name) absorb(passthru)]

if missing("`absorb'") local absorb noabsorb 

local y: word 1 of `varlist' 

reghdfe `varlist', `absorb' resid 

su `y' if e(sample), meanonly 
local mu = `r(mean)'

tempvar residual 
predict `residual', residual 

if !missing("`replace'") replace `y' = `residual' + `mu' 
else if !missing("`generate'") gen `generate' = `residual' + `mu' 
else di as error "Please specificy " as input "replace" as error " or " as input "generate({it:varname})" 

end 

/***
Program: store_estimates
Purpose: Stores beta, se, and p-value estimates.
***/
program define store_estimates, rclass

args var

if "`var'" == "lincom"{
    local b = string(r(estimate), "%10.3fc") 
    local se = "(" + string(r(se), "%10.3fc") + ")"
}
else {
    local b = string(_b[`var'], "%10.3fc") 
    local se = "(" + string(_se[`var'], "%10.3fc") + ")"
}

local p = 2*(1-normal(abs(`b'/`se')))

local star=""
if `p' >.05 & `p' <=.10 {   
    local star = "^{*}"
}
if `p' >.01 & `p' <=.05 {   
    local star = "^{**}"
}
if `p' <=.01 {  
    local star = "^{***}" 
}
local p_val=string(`p', "%10.3fc")

if `se' == `b' & `se' == 0 { 
    local b "\multicolumn{1}{c}{--}" 
    local se "" 
}

return local beta = "`b'"
return local se = "`se'"
/* if !missing("`star'") return local star = "$`star'$" */
return local pvalue= "`p_val'"

end

/*==================================================================
*** MAKE FIGURES ***
==================================================================*/

/*===========================================================================
Figure 1: Wage Subsidies Bar Graph
=============================================================================*/

use "$Data/main_survey_wide.dta", clear

* Normalize outcome variable
replace hire_sub = 100 * hire_sub 

* Generate matrix that stores the regression results
local row = 1
mat R = J(5,4,.)

* Full sample regression results
reg hire_sub ibn.subsidy_rate, nocons cluster(firm_id)
mat li r(table)
di 
foreach X in 0bn.subsidy_rate 10.subsidy_rate 25.subsidy_rate 50.subsidy_rate 100.subsidy_rate {
    mat R[`row',1]=_b[`X']
    mat R[`row',2]=_b[`X']-1.96*_se[`X']
    mat R[`row',3]=_b[`X']+1.96*_se[`X']
    mat R[`row',4]=`row'
    local coef_`row' = string(_b[`X'], "%8.0f")
    format 
    local ++row
}

* Calculate p-values testing whether the estimate is different from the baseline estimate
local row=2
reg hire_sub i.subsidy_rate, cluster(firm_id)
foreach X in 10.subsidy_rate 25.subsidy_rate 50.subsidy_rate 100.subsidy_rate {
    local t = _b[`X']/_se[`X']
    local p = 2*ttail(e(df_r),abs(`t'))
    local p_value_`row' = substr(string(`p', "%8.2f"), 2, .)
    local ++row
}

clear
svmat R
la var R1 "Willingness to Work with WCs (%)"
label define R4 1 "Baseline" 2 "10%" 3 "25%" 4 "50%" 5 "100%"
label values R4 R4

* Add x axis spacing between groups
generate x_value = R4 if R4 == 1
replace x_value = R4 + 0.5 if R4 == 2
replace x_value = R4 + 1 if R4 == 3
replace x_value = R4 + 1.5 if R4 == 4
replace x_value = R4 + 2 if R4 == 5
sort x_value
list x_value R4 , sepby(R4)

* Generate graph
twoway (bar R1 x_value, barw(1) fi(inten10) lc(black) lw(medium) ) ///
(rcap R3 R2 x_value, lc(gs5)), ///
legend(off) xlabel(1.0 "Baseline" 2.5 `""10%" "Subsidy""' 4 `""25%" "Subsidy""' 5.5 `""50%" "Subsidy""' 7 `""100%" "Subsidy""') ///
ylabel(0(020)100, nogrid angle(0)) yscale(range(1.0))  ///
text(20 1 "`coef_1'%") text(20 2.5 "`coef_2'%") text(20 4 "`coef_3'%")  text(20 5.5 "`coef_4'%") text(20 7 "`coef_5'%") ///
text(95 2.5 "{it:p}-value=`p_value_2'") text(95 4 "{it:p}-value=`p_value_3'") text(95 5.5 "{it:p}-value=`p_value_4'") text(95 7 "{it:p}-value=`p_value_5'") ///
subtitle("",  margin(zero) size(medium) pos(11)) ///
graphregion(color(white)) bgcolor(white)   ytitle("Willing to Work with WCs (%)") xtitle("") xscale(titlegap(*10)) yscale(titlegap(*5)) 

graph display, ysize(5) xsize(7)	
graph export "$Figures/f1_baseline_wagesubsidies.pdf", replace  	
graph export "$Figures/f1_baseline_wagesubsidies.eps", replace  


/*===========================================================================
Figure 2: Heterogeneity by Labor Market Conditions and Job Characteristics 
=============================================================================*/

/*---------------  Panel A: No Subsidy Sample --------------*/
use "$Data/main_survey_wide.dta", clear

* Normalize outcome variables
foreach var in hire_sub hire_sub_alt { 
    replace `var' = 100*`var'
} 

* Generate variables for low value inventory 
gen low_val = high_val == 0

* Generate variables for no customer interaction
gen no_cust_int = cust_int == 0

local row=1
mat R=J(6,5,.)

* Reshape data to long with one observation for if "Yes" to hiring question and one that is equal to 1 if respondent answered "Yes" or "If hard to fill my job" to the hiring question
preserve
use "$Data/main_survey_wide.dta", clear
keep hire_sub hire_sub_alt subsidy_rate mgr_id firm_id
replace hire_sub = 100*hire_sub
replace hire_sub_alt = 100*hire_sub_alt
rename hire_sub hireBase
rename hire_sub_alt hireHardToFill 
reshape long hire, i(mgr_id) j(demand_type) string 
gen hard_to_fill = demand_type == "HardToFill"

* Regress indicator for hire on indicator for whether we include "If hard to fill my job" in hiring indicator
reg hire ibn.hard_to_fill if subsidy_rate==0, cluster(firm_id) nocons
foreach X in 0bn.hard_to_fill 1.hard_to_fill {
    mat R[`row',1]=_b[`X']
    mat R[`row',2]=_b[`X']-1.96*_se[`X']
    mat R[`row',3]=_b[`X']+1.96*_se[`X']
    mat R[`row',4]=`row'	
    local coef_`row' = string(_b[`X'], "%8.0f")
    format 
    local ++row
}

* p-value for "If hard to fill my job" relative to baseline
reg hire hard_to_fill if subsidy_rate==0, cluster(firm_id)
local t = _b[hard_to_fill]/_se[hard_to_fill]
local p = 2*ttail(e(df_r),abs(`t'))
local p_value_1 = substr(string(`p', "%8.2f"), 2, .)
restore 

* High value inventory and low value inventory estimates
reg hire_sub high_val low_val if !missing(high_val) & subsidy_rate==0, nocons cluster(firm_id) 
foreach X in high_val low_val {
    mat R[`row',1]=_b[`X']
    mat R[`row',2]=_b[`X']-1.96*_se[`X']
    mat R[`row',3]=_b[`X']+1.96*_se[`X']
    mat R[`row',4]=`row'
    local coef_`row' = string(_b[`X'], "%8.0f")
    format 
    local ++row
}

* p-values for high value inventory estimate relative to low value
preserve
reg hire_sub high_val if !missing(high_val) & subsidy_rate==0, cluster(firm_id) 
local t = _b[high_val]/_se[high_val]
local p = 2*ttail(e(df_r),abs(`t'))
local p_value_2 = substr(string(`p', "%8.2f"), 2, .)
restore

* Customer interaction vs. no customer interaction estimates
reg hire_sub cust_int no_cust_int if !missing(cust_int) & subsidy_rate==0, nocons cluster(firm_id)
mat li r(table)
di 
foreach X in cust_int no_cust_int {
    mat R[`row',1]=_b[`X']
    mat R[`row',2]=_b[`X']-1.96*_se[`X']
    mat R[`row',3]=_b[`X']+1.96*_se[`X']
    mat R[`row',4]=`row'
    mat R[`row',5]=1
    local coef_`row' = string(_b[`X'], "%8.0f")
    format 
    local ++row
}

* p-values for customer interaction estimate relative to no customer interaction
preserve
reg hire_sub cust_int if !missing(cust_int) & subsidy_rate==0, cluster(firm_id) 
local t = _b[cust_int]/_se[cust_int]
local p = 2*ttail(e(df_r),abs(`t'))
local p_value_3 = substr(string(`p', "%8.2f"), 2, .)
restore

clear
svmat R
la var R1 "Willingness to Work with WCs (%)"

* Define group based on order of regressions and predictions above
gen Group = .
replace Group = 1 if R4 == 1 //baseline
replace Group = 1 if R4 == 2 //if hard to fill my job
replace Group = 2 if R4 == 3 | R4 == 4 //inventory value
replace Group = 3 if R4 == 5 | R4 == 6 //customer interaction

label define Group 1 "Baseline" 2 "Weak Labor Market" 3 "Inventory Value" 4 "Customer Interaction"
label values Group Group

* Add x axis spacing between groups
generate x_value = R4 if Group == 1
replace x_value = R4 + 0.5 if Group == 2
replace x_value = R4 + 1 if Group == 3
sort x_value 
list x_value Group, sepby(Group)

* Generate graph
twoway (bar R1 x_value, barw(1) fi(inten10) lc(black) lw(medium) ) ///
(rcap R3 R2 x_value, lc(gs5) ///
legend(off) xlabel(1 "Baseline" 2 `""If Hard to" "Fill My Jobs""' 3.5 `""High Value" "Inventory""' 4.5 `""Low Value" "Inventory""' 6 `""Customer" "Interaction""' 7 `""No Customer" "Interaction""') ///
ylabel(0(020)100, nogrid angle(0)) yscale(range(1.0))  ///
text(20 1 "`coef_1'%") text(20 2 "`coef_2'%") text(20 3.5 "`coef_3'%")  text(20 4.5 "`coef_4'%") text(20 6 "`coef_5'%") text(20 7 "`coef_6'%") ///
xline(2.75, lpattern(-) lcolor(black)) ///
xline(5.25, lpattern(-) lcolor(black)) ///
text(95 1.5 "{it:p}-value=`p_value_1'") text(95 4 "{it:p}-value=`p_value_2'") text(95 6.5 "{it:p}-value=`p_value_3'") ///
subtitle("",  margin(zero) size(medium) pos(11)) ///
graphregion(color(white)) bgcolor(white) ytitle("Willing to Work with WCs (%)") xtitle("") xscale(titlegap(*10)) yscale(titlegap(*5))) 

graph display, ysize(4) xsize(6)	
graph export "$Figures/f2a_labormarket_jobchar_nosub.pdf", replace 
graph export "$Figures/f2a_labormarket_jobchar_nosub.eps", replace 

/*------------------  Panel B: Full Sample ------------------*/
use "$Data/main_survey_wide.dta", clear

* Normalize outcome variables
foreach var in hire_sub hire_sub_alt { 
    replace `var'= 100*`var'
} 

* Generate variables for low value inventory 
gen low_val = high_val == 0

* Generate variables for no customer interaction
gen no_cust_int = cust_int == 0

local row=1
mat R=J(6,5,.)

* Reshape data to long with one observation for if "Yes" to hiring question and one that is equal to 1 if respondent answered "Yes" or "If hard to fill my job" to the hiring question
preserve
use "$Data/main_survey_wide.dta", clear
keep hire_sub hire_sub_alt subsidy_rate mgr_id firm_id
replace hire_sub = 100*hire_sub
replace hire_sub_alt = 100*hire_sub_alt
rename hire_sub hireBase
rename hire_sub_alt hireHardToFill 
reshape long hire, i(mgr_id) j(demand_type) string 
gen hard_to_fill = demand_type == "HardToFill"

* Regress indicator for hire on indicator for whether we include "If hard to fill my job" in hiring indicator
reg hire ibn.hard_to_fill i.subsidy_rate, cluster(firm_id) nocons
foreach X in 0bn.hard_to_fill 1.hard_to_fill {
    mat R[`row',1]=_b[`X']
    mat R[`row',2]=_b[`X']-1.96*_se[`X']
    mat R[`row',3]=_b[`X']+1.96*_se[`X']
    mat R[`row',4]=`row'	
    local coef_`row' = string(_b[`X'], "%8.0f")
    format 
    local ++row
}

* p-value for "If hard to fill my job" relative to baseline
reg hire hard_to_fill i.subsidy_rate, cluster(firm_id)
local t = _b[hard_to_fill]/_se[hard_to_fill]
local p = 2*ttail(e(df_r),abs(`t'))
local p_value_1 = substr(string(`p', "%8.2f"), 2, .)
restore

* High value inventory and low value inventory estimates
reg hire_sub high_val low_val i.subsidy_rate if !missing(high_val), nocons cluster(firm_id) 
foreach X in high_val low_val {
    mat R[`row',1]=_b[`X']
    mat R[`row',2]=_b[`X']-1.96*_se[`X']
    mat R[`row',3]=_b[`X']+1.96*_se[`X']
    mat R[`row',4]=`row'
    local coef_`row' = string(_b[`X'], "%8.0f")
    format 
    local ++row
}

* p-values for high value inventory estimate relative to low value
preserve
reg hire_sub high_val i.subsidy_rate if !missing(high_val), cluster(firm_id) 
local t = _b[high_val]/_se[high_val]
local p = 2*ttail(e(df_r),abs(`t'))
local p_value_2 = substr(string(`p', "%8.2f"), 2, .)
restore

* Customer interaction vs. no customer interaction estimates
reg hire_sub cust_int no_cust_int i.subsidy_rate if !missing(cust_int), nocons cluster(firm_id)
mat li r(table)
di 
foreach X in cust_int no_cust_int {
    mat R[`row',1]=_b[`X']
    mat R[`row',2]=_b[`X']-1.96*_se[`X']
    mat R[`row',3]=_b[`X']+1.96*_se[`X']
    mat R[`row',4]=`row'
    mat R[`row',5]=1
    local coef_`row' = string(_b[`X'], "%8.0f")
    format 
    local ++row
}

* p-values for customer interaction estimate relative to no customer interaction
preserve
reg hire_sub cust_int i.subsidy_rate if !missing(cust_int), cluster(firm_id) 
local t = _b[cust_int]/_se[cust_int]
local p = 2*ttail(e(df_r),abs(`t'))
local p_value_3 = substr(string(`p', "%8.2f"), 2, .)
restore

clear
svmat R
la var R1 "Willingness to Work with WCs (%)"

* Define group based on order of regressions and predictions above
gen Group = .
replace Group = 1 if R4 == 1 //baseline
replace Group = 1 if R4 == 2 //if hard to fill my job
replace Group = 2 if R4 == 3 | R4 == 4 //inventory value
replace Group = 3 if R4 == 5 | R4 == 6 //customer interaction

label define Group 1 "Baseline" 2 "Weak Labor Market" 3 "Inventory Value" 4 "Customer Interaction"
label values Group Group

* Add x axis spacing between groups
generate x_value = R4 if Group == 1
replace x_value = R4 + 0.5 if Group == 2
replace x_value = R4 + 1 if Group == 3
sort x_value 
list x_value Group, sepby(Group)

* Generate graph
twoway (bar R1 x_value, barw(1) fi(inten10) lc(black) lw(medium) ) ///
(rcap R3 R2 x_value, lc(gs5) ///
legend(off) xlabel(1 "Baseline" 2 `""If Hard to" "Fill My Jobs""' 3.5 `""High Value" "Inventory""' 4.5 `""Low Value" "Inventory""' 6 `""Customer" "Interaction""' 7 `""No Customer" "Interaction""') ///
ylabel(0(020)100, nogrid angle(0)) yscale(range(1.0))  ///
text(20 1 "`coef_1'%") text(20 2 "`coef_2'%") text(20 3.5 "`coef_3'%")  text(20 4.5 "`coef_4'%") text(20 6 "`coef_5'%") text(20 7 "`coef_6'%") ///
xline(2.75, lpattern(-) lcolor(black)) ///
xline(5.25, lpattern(-) lcolor(black)) ///
text(95 1.5 "{it:p}-value=`p_value_1'") text(95 4 "{it:p}-value=`p_value_2'") text(95 6.5 "{it:p}-value=`p_value_3'") ///
subtitle("", margin(zero) size(medium) pos(11)) ///
graphregion(color(white)) bgcolor(white) ytitle("Willing to Work with WCs (%)") xtitle("") xscale(titlegap(*10)) yscale(titlegap(*5))) 

graph display, ysize(4) xsize(6)
graph export "$Figures/f2b_labormarket_jobchar_full.pdf", replace 
graph export "$Figures/f2b_labormarket_jobchar_full.eps", replace 

/*===========================================================================
Figure 3: Crime and Safety Insurance, Job History Screening, 
          and Limited Criminal Record Screening  
=============================================================================*/

/*------------------  Panel A: No Subsidy Sample ------------------*/
use "$Data/main_survey_long.dta", clear

* Normalize outcome variables
foreach var in hire_sub hire_ins hire_hist hire_clean { 
    replace `var' = 100*`var'
} 

local row=1
mat R=J(10,5,.)

* Baseline 
preserve 
use "$Data/main_survey_wide.dta", clear
replace hire_sub = 100*hire_sub
reg hire_sub if subsidy_rate == 0, cluster(firm_id)
foreach X in _cons {
    mat R[`row',1]=_b[`X']
    mat R[`row',2]=_b[`X']-1.96*_se[`X']
    mat R[`row',3]=_b[`X']+1.96*_se[`X']
    mat R[`row',4]=`row'
    mat R[`row',5]=1
    local coef_1 = string(_b[`X'], "%8.0f")
    local t = _b[`X']/_se[`X']
    local p = 2*ttail(e(df_r),abs(`t'))
    local p_value_1 = substr(string(`p', "%8.2f"), 2, .)
    format 
    local ++row
}
restore 

* Estimates by insurance cap level
reg hire_ins ibn.ins_cap if subsidy_rate == 0, cluster(firm_id) noconstant
foreach X in 5000.ins_cap {
    mat R[`row',1]=_b[`X']
    mat R[`row',2]=_b[`X']-1.96*_se[`X']
    mat R[`row',3]=_b[`X']+1.96*_se[`X']
    mat R[`row',4]=`row'
    mat R[`row',5]=1
    local coef_2 = string(_b[`X'], "%8.0f")
    preserve
    reg hire_ins i.ins_cap if subsidy_rate == 0, cluster(firm_id)
    local t = _b[`X']/_se[`X']
    local p = 2*ttail(e(df_r),abs(`t'))
    local p_value_2 = substr(string(`p', "%8.2f"), 2, .)
    restore
    format 
    local ++row
}	

* Estimates by number of completed jobs
reg hire_hist ibn.past_jobs if subsidy_rate == 0, cluster(firm_id) noconstant 
foreach X in 1.past_jobs {
    mat R[`row',1]=_b[`X']
    mat R[`row',2]=_b[`X']-1.96*_se[`X']
    mat R[`row',3]=_b[`X']+1.96*_se[`X']
    mat R[`row',4]=`row'
    mat R[`row',5]=1
    local coef_3 = string(_b[`X'], "%8.0f")
    preserve
    reg hire_hist i.past_jobs if subsidy_rate == 0, cluster(firm_id) 
    local t = _b[`X']/_se[`X']
    local p = 2*ttail(e(df_r),abs(`t'))
    local p_value_3 = substr(string(`p', "%8.2f"), 2, .)
    restore
    format 
    local ++row
}

* Estimates by number of years elapsed since last arrest or conviction
reg hire_clean ibn.years_elapsed if subsidy_rate == 0, cluster(firm_id) noconstant 
foreach X in 1.years_elapsed {
    mat R[`row',1]=_b[`X']
    mat R[`row',2]=_b[`X']-1.96*_se[`X']
    mat R[`row',3]=_b[`X']+1.96*_se[`X']
    mat R[`row',4]=`row'
    mat R[`row',5]=1
    local coef_4 = string(_b[`X'], "%8.0f")
    preserve
    reg hire_clean i.years_elapsed if subsidy_rate == 0, cluster(firm_id) 
    local t = _b[`X']/_se[`X']
    local p = 2*ttail(e(df_r),abs(`t'))
    local p_value_4 = substr(string(`p', "%8.2f"), 2, .)
    restore
    format 
    local ++row
}


clear	
svmat R
la var R1 "Willingness to Work with WCs (%)"

label define R4 1 "Baseline" 2 "$5k Insurance" 3 "1 Past Job" 4 "1 Year Clean"
label values R4 R4


* Add x axis spacing between groups
generate x_value = R4 if R4 == 1
replace x_value = R4 + 0.5 if R4 == 2
replace x_value = R4 + 1 if R4 == 3
replace x_value = R4 + 1.5 if R4 == 4
sort x_value 
list x_value R4, sepby(R4)

* Generate graph
twoway (bar R1 x_value, barw(1) fi(inten10) lc(black) lw(medium) ) ///
(rcap R3 R2 x_value, lc(gs5) ///
legend(off) xlabel(1.0 "Baseline" 2.5 `""Crime and Safety" "Insurance""' 4 `""Job History" "Screening""' 5.5 `""Limited Record" "Screening""') ///
ylabel(0(020)100, nogrid angle(0)) yscale(range(1.0))  ///
text(20 1 "`coef_1'%") text(20 2.5 "`coef_2'%") text(20 4 "`coef_3'%")  text(20 5.5 "`coef_4'%") /// 
text(95 2.5 "{it:p}-value=`p_value_2'") text(95 4 "{it:p}-value=`p_value_3'") text(95 5.5 "{it:p}-value=`p_value_4'") ///
subtitle("",  margin(zero) size(medium) pos(11)) ///
graphregion(color(white)) bgcolor(white) ytitle("Willing to Work with WCs (%)") xtitle("") xscale(titlegap(*10)) yscale(titlegap(*5))) 


graph display, ysize(5) xsize(6)	
graph export "$Figures/f3a_insurance_jobhistory_screen_nosub.pdf", replace  
graph export "$Figures/f3a_insurance_jobhistory_screen_nosub.eps", replace 

/*------------------  Panel B: Full Sample ------------------*/
use "$Data/main_survey_long.dta", clear

* Normalize outcome variables
foreach var in hire_sub hire_ins hire_hist hire_clean { 
replace `var'= 100*`var'
} 

local row=1
mat R=J(10,5,.)

* Baseline 
preserve 
use "$Data/main_survey_wide.dta", clear
replace hire_sub = 100*hire_sub
reg hire_sub i.subsidy_rate, cluster(firm_id)
foreach X in _cons {
    mat R[`row',1]=_b[`X']
    mat R[`row',2]=_b[`X']-1.96*_se[`X']
    mat R[`row',3]=_b[`X']+1.96*_se[`X']
    mat R[`row',4]=`row'
    mat R[`row',5]=1
    local coef_1 = string(_b[`X'], "%8.0f")
    local t = _b[`X']/_se[`X']
    local p = 2*ttail(e(df_r),abs(`t'))
    local p_value_1 = substr(string(`p', "%8.2f"), 2, .)
    format 
    local ++row
}
restore 

* Estimates by insurance cap level
reg hire_ins i.subsidy_rate ibn.ins_cap, cluster(firm_id) noconstant
foreach X in 5000.ins_cap {
    mat R[`row',1]=_b[`X']
    mat R[`row',2]=_b[`X']-1.96*_se[`X']
    mat R[`row',3]=_b[`X']+1.96*_se[`X']
    mat R[`row',4]=`row'
    mat R[`row',5]=1
    local coef_2 = string(_b[`X'], "%8.0f")
    preserve
    reg hire_ins i.subsidy_rate i.ins_cap, cluster(firm_id)
    local t = _b[`X']/_se[`X']
    local p = 2*ttail(e(df_r),abs(`t'))
    local p_value_2 = substr(string(`p', "%8.2f"), 2, .)
    restore
    format 
    local ++row
}	

* Estimates by number of completed jobs
reg hire_hist i.subsidy_rate ibn.past_jobs, cluster(firm_id) noconstant 
foreach X in 1.past_jobs {
    mat R[`row',1]=_b[`X']
    mat R[`row',2]=_b[`X']-1.96*_se[`X']
    mat R[`row',3]=_b[`X']+1.96*_se[`X']
    mat R[`row',4]=`row'
    mat R[`row',5]=1
    local coef_3 = string(_b[`X'], "%8.0f")
    preserve
    reg hire_hist i.subsidy_rate i.past_jobs, cluster(firm_id) 
    local t = _b[`X']/_se[`X']
    local p = 2*ttail(e(df_r),abs(`t'))
    local p_value_3 = substr(string(`p', "%8.2f"), 2, .)
    restore
    format 
    local ++row
}

* Estimates by number of years elapsed since last arrest or conviction
reg hire_clean i.subsidy_rate ibn.years_elapsed, cluster(firm_id) noconstant 
foreach X in 1.years_elapsed {
    mat R[`row',1]=_b[`X']
    mat R[`row',2]=_b[`X']-1.96*_se[`X']
    mat R[`row',3]=_b[`X']+1.96*_se[`X']
    mat R[`row',4]=`row'
    mat R[`row',5]=1
    local coef_4 = string(_b[`X'], "%8.0f")
    preserve
    reg hire_clean i.subsidy_rate i.years_elapsed, cluster(firm_id) 
    local t = _b[`X']/_se[`X']
    local p = 2*ttail(e(df_r),abs(`t'))
    local p_value_4 = substr(string(`p', "%8.2f"), 2, .)
    restore
    format 
    local ++row
}


clear	
svmat R
la var R1 "Willingness to Work with WCs (%)"

label define R4 1 "Baseline" 2 "$5k Insurance" 3 "1 Past Job" 4 "1 Year Clean"
label values R4 R4


* Add x axis spacing between groups
generate x_value = R4 if R4 == 1
replace x_value = R4 + 0.5 if R4 == 2
replace x_value = R4 + 1 if R4 == 3
replace x_value = R4 + 1.5 if R4 == 4
sort x_value 
list x_value R4, sepby(R4)

* Generate graph
twoway (bar R1 x_value, barw(1) fi(inten10) lc(black) lw(medium) ) ///
(rcap R3 R2 x_value, lc(gs5) ///
legend(off) xlabel(1.0 "Baseline" 2.5 `""Crime and Safety" "Insurance""' 4 `""Job History" "Screening""' 5.5 `""Limited Record" "Screening""') ///
ylabel(0(020)100, nogrid angle(0)) yscale(range(1.0))  ///
text(20 1 "`coef_1'%") text(20 2.5 "`coef_2'%") text(20 4 "`coef_3'%")  text(20 5.5 "`coef_4'%") /// 
text(95 2.5 "{it:p}-value=`p_value_2'") text(95 4 "{it:p}-value=`p_value_3'") text(95 5.5 "{it:p}-value=`p_value_4'") ///
subtitle("",  margin(zero) size(medium) pos(11)) ///
graphregion(color(white)) bgcolor(white) ytitle("Willing to Work with WCs (%)") xtitle("") xscale(titlegap(*10)) yscale(titlegap(*5))) 


graph display, ysize(5) xsize(6)	
graph export "$Figures/f3b_insurance_jobhistory_screen_full.pdf", replace
graph export "$Figures/f3b_insurance_jobhistory_screen_full.eps", replace   

/*===========================================================================
Figure 4: Criminal Record Screening by Conviction Type
=============================================================================*/

/*------------------  Panel A: No Subsidy Sample ------------------*/
use "$Data/main_survey_longcrime.dta", clear

* Normalize outcome variable 
replace hire = 100*hire

gen no_conviction_restriction = violent_fel != 1 & prop_fin_fel != 1 & substance_fel != 1 & violent_mis != 1 & prop_fin_mis != 1 & substance_mis != 1 

local row=1
mat R=J(12,5,.)

* Estimates by conviction type
reg hire violent_fel prop_fin_fel substance_fel ///
violent_mis prop_fin_mis substance_mis no_conviction_restriction if subsidy_rate == 0, cluster(firm_id) noconstant
foreach X in violent_fel violent_mis prop_fin_fel prop_fin_mis substance_fel substance_mis {
    mat R[`row',1]=_b[`X']
    mat R[`row',2]=_b[`X']-1.96*_se[`X']
    mat R[`row',3]=_b[`X']+1.96*_se[`X']
    mat R[`row',4]=`row'
    mat R[`row',5]=0
    local coef_`row' = string(_b[`X'], "%8.0f")
    format 
    local ++row
}	

* p-values by conviction type relative to violent felony
local row=2
reg hire prop_fin_fel substance_fel ///
violent_mis prop_fin_mis substance_mis if subsidy_rate == 0 & question != "sub", cluster(firm_id) 
foreach X in violent_mis prop_fin_fel prop_fin_mis substance_fel substance_mis {
    local t = _b[`X']/_se[`X']
    local p = 2*ttail(e(df_r),abs(`t'))
    local p_value_`row' = substr(string(`p', "%8.2f"), 2, .)
    format 
    local ++row
}

clear
svmat R
la var R1 "Willingness to Work with WCs (%)"

label define R4 1 "Violent Felony" 3 "Property/Financial Felony" 5 "Drug-Related Felony" 2 "Violent Misdemeanor" 4 "Property/Financial Misdemeanor" 6 "Drug-Related Misdemeanor"
label values R4 R4

* Add x axis spacing between groups
generate x_value = R4 if R4 == 1
replace x_value = R4 + 0.5 if R4 == 2
replace x_value = R4 + 1 if R4 == 3
replace x_value = R4 + 1.5 if R4 == 4
replace x_value = R4 + 2 if R4 == 5	
replace x_value = R4 + 2.5 if R4 == 6

sort x_value 
list x_value R4, sepby(R4)

* Generate graph
twoway (bar R1 x_value, barw(1) fi(inten10) lc(black) lw(medium) ) ///
(rcap R3 R2 x_value, lc(gs5) ///
legend(off)  xlabel(1 `""Violent" "Felony""' 2.5 `""Violent" "Misdemeanor""'  4 `""Property/Financial" "Felony""'   5.5 `""Property/Financial" "Misdemeanor""'  7  `""Drug-Related" "Felony""'  8.5 `""Drug-Related" "Misdemeanor""' ) ///
ylabel(0(020)100, nogrid angle(0)) yscale(range(1.0))  ///
text(14 1 "`coef_1'%") text(18 2.5 "`coef_2'%") text(24 4 "`coef_3'%")  text(18 5.5 "`coef_4'%") ///
text(18 7 "`coef_5'%") text(18 8.5 "`coef_6'%")  ///
text(95 2.5 "{it:p}-value=`p_value_2'") text(95 4 "{it:p}-value=`p_value_3'")  text(95 5.5 "{it:p}-value=`p_value_4'") ///
text(95 7 "{it:p}-value=`p_value_5'") text(95 8.5 "{it:p}-value=`p_value_6'")  ///
subtitle("", margin(zero) size(medium) pos(11)) ///
graphregion(color(white)) bgcolor(white) ytitle("Willing to Work with WCs (%)") xtitle("") xscale(titlegap(*10)) yscale(titlegap(*5))) 

graph display, ysize(5) xsize(9)	
graph export "$Figures/f4a_crime_type_nosub.pdf", replace  
graph export "$Figures/f4a_crime_type_nosub.eps", replace 

/*------------------  Panel B: Full Sample ------------------*/
use "$Data/main_survey_longcrime.dta", clear

* Normalize outcome variable 
replace hire = 100*hire

gen no_conviction_restriction = violent_fel != 1 & prop_fin_fel != 1 & substance_fel != 1 & violent_mis != 1 & prop_fin_mis != 1 & substance_mis != 1 
local row=1
mat R=J(12,5,.)

* Estimates by conviction type
reg hire violent_fel prop_fin_fel substance_fel ///
violent_mis prop_fin_mis substance_mis no_conviction_restriction i.subsidy_rate, cluster(firm_id) noconstant
foreach X in violent_fel violent_mis prop_fin_fel prop_fin_mis substance_fel substance_mis {
    mat R[`row',1]=_b[`X']
    mat R[`row',2]=_b[`X']-1.96*_se[`X']
    mat R[`row',3]=_b[`X']+1.96*_se[`X']
    mat R[`row',4]=`row'
    mat R[`row',5]=0
    local coef_`row' = string(_b[`X'], "%8.0f")
    format 
    local ++row
}	

* p-values by conviction type
local row=2
reg hire prop_fin_fel substance_fel ///
violent_mis prop_fin_mis substance_mis i.subsidy_rate if question != "sub", cluster(firm_id) 
foreach X in violent_mis prop_fin_fel prop_fin_mis substance_fel substance_mis {
    local t = _b[`X']/_se[`X']
    local p = 2*ttail(e(df_r),abs(`t'))
    local p_value_`row' = substr(string(`p', "%8.2f"), 2, .)
    format 
    local ++row
}

clear
svmat R
la var R1 "Willingness to Work with WCs (%)"

label define R4 1 "Violent Felony" 3 "Property/Financial Felony" 5 "Drug-Related Felony" 2 "Violent Misdemeanor" 4 "Property/Financial Misdemeanor" 6 "Drug-Related Misdemeanor"
label values R4 R4

* Add x axis spacing between groups
generate x_value = R4 if R4 == 1
replace x_value = R4 + 0.5 if R4 == 2
replace x_value = R4 + 1 if R4 == 3
replace x_value = R4 + 1.5 if R4 == 4
replace x_value = R4 + 2 if R4 == 5	
replace x_value = R4 + 2.5 if R4 == 6

sort x_value 
list x_value R4, sepby(R4)

* Generate graph
twoway (bar R1 x_value, barw(1) fi(inten10) lc(black) lw(medium) ) ///
(rcap R3 R2 x_value, lc(gs5) ///
legend(off)  xlabel(1 `""Violent" "Felony""' 2.5 `""Violent" "Misdemeanor""'  4 `""Property/Financial" "Felony""'   5.5 `""Property/Financial" "Misdemeanor""'  7  `""Drug-Related" "Felony""'  8.5 `""Drug-Related" "Misdemeanor""' ) ///
ylabel(0(020)100, nogrid angle(0)) yscale(range(1.0))  ///
text(14 1 "`coef_1'%") text(18 2.5 "`coef_2'%") text(20 4 "`coef_3'%")  text(18 5.5 "`coef_4'%") ///
text(18 7 "`coef_5'%") text(18 8.5 "`coef_6'%")  ///
text(95 2.5 "{it:p}-value=`p_value_2'") text(95 4 "{it:p}-value=`p_value_3'")  text(95 5.5 "{it:p}-value=`p_value_4'") ///
text(95 7 "{it:p}-value=`p_value_5'") text(95 8.5 "{it:p}-value=`p_value_6'")  ///
subtitle("",  margin(zero) size(medium) pos(11)) ///
graphregion(color(white)) bgcolor(white) ytitle("Willing to Work with WCs (%)") xtitle("") xscale(titlegap(*10)) yscale(titlegap(*5))) 

graph display, ysize(5) xsize(9)	
graph export "$Figures/f4b_crime_type_full.pdf", replace  
graph export "$Figures/f4b_crime_type_full.eps", replace  

/*===========================================================================
Figure 5: Beliefs about WC Productivity 
=============================================================================*/

use "$Data/main_survey_wide.dta", clear
local paper_aspect xsize(12) ysize(8) 

* Variable for share who believe percent low performance is above actual percent low performance 
gen above_lo = 100*(performance_percent_pre > 3) if info_type == 2
su above_lo
local share_above = string(`r(mean)', "%2.0f")

* Variable for share who believe percent high performance is below actual percent high performance
gen below_hi = 100*(performance_percent_pre < 87) if info_type == 1
su below_hi
local share_below = string(`r(mean)', "%2.0f")

/*----------  Panel A: High Performance Posterior Beliefs  ----------*/
* info_type == 1 is the low performance information treatment
local i = 1

* Get regression coefs (binary first stage)
use "$Data/main_survey_wide.dta", clear
keep if info_type == `i'
keep performance_percent_post performance_percent_pre info_randomization firm_id
gen id = _n 
reshape long performance_percent, i(id) j(wave) string
gen post = wave == "_post"

* Keep only observations where respondent was shown the information
keep if info_randomization == 1
reg performance_percent ib0.post , cluster(firm_id)

store_estimates 1.post 
local info_ATE = r(beta) + " " + r(se)

* Plot 
use "$Data/main_survey_wide.dta", clear

local graying .6
keep if info_type == `i'
keep if info_randomization == 1

keep performance_percent_post performance_percent_pre firm_id
gen id = _n 
reshape long performance_percent, i(id) j(wave) string
gen post = wave == "_post"

if `i' == 1 local highlight "maroon"
if `i' == 2 local highlight "navy" 

if `i' == 1 local truth_line xline(87, lwidth(medthick) lpattern(dash_dot)  lcolor(maroon)) 
if `i' == 2 local truth_line xline(3, lwidth(medthick) lpattern(dash_dot) lcolor(navy)) 

#delim ; 

if `i' == 1 local labels 
(scatteri 65 85 "Posterior. {&Delta} = `info_ATE'  ", mlabpos(10)  msymbol(i) mlabsize(medium)  mlabcolor(`highlight'))
(scatteri 10 80 "Prior", mlabpos(10)  msymbol(i) mlabsize(medium) mlabcolor(`highlight'*`graying'));
if `i' == 2 local labels 
(scatteri 65 6 "Posterior. {&Delta} = `info_ATE'", mlabpos(2)  msymbol(i) mlabsize(medium)  mlabcolor(`highlight'))
(scatteri 12 10 "Prior", mlabpos(2)  msymbol(i) mlabsize(medium) mlabcolor(`highlight'*`graying'));


#delim cr 

egen bin = cut(performance_percent), at(0(5)100)
gcollapse (count) performance_percent, by(wave bin)
drop if missing(bin)

egen total = total(performance_percent), by(wave)

expand 2 if bin < 100, gen(right) 
replace bin = bin + 5 if right == 1

gen order = - right 
sort bin order

gen pdf = performance_percent/total * 100

* Generate graph
twoway (line pdf bin if wave == "_post",  lwidth(medthick) lcolor(`highlight')) ///
(line pdf bin if wave == "_pre",  lwidth(medthick) lcolor(`highlight'*`graying')) ///
`labels' /// 
, `paper_aspect' `truth_line' xlabel(0(20)100, labsize(large)) ylabel(0(25)75, labsize(large) angle(0)) legend(off) ///
xtitle("High-Performance Beliefs", size(medium)) ytitle("") ///
graphregion(color(white)) bgcolor(white)      ///
ytitle("Share of Respondents (%)", size(medium) margin(zero) box bcolor(white))

graph export "$Figures/f5a_posteriorbeliefshighperf.pdf", replace  

* Generate graph in monochrome
local highlight "gs2"
twoway (line pdf bin if wave == "_post",  lwidth(medthick) lcolor(gs2)) ///
(line pdf bin if wave == "_pre",  lwidth(medthick) lcolor(gs2*`graying')) ///
(scatteri 65 85 "Posterior. {&Delta} = `info_ATE'  ", mlabpos(10)  msymbol(i) mlabsize(medium)  mlabcolor(`highlight')) ///
(scatteri 10 80 "Prior", mlabpos(10)  msymbol(i) mlabsize(medium) mlabcolor(`highlight'*`graying')) /// 
, `paper_aspect' xline(87, lwidth(medthick) lpattern(dash_dot)  lcolor(`highlight'))  xlabel(0(20)100, labsize(large)) ylabel(0(25)75, labsize(large) angle(0)) legend(off) ///
xtitle("High-Performance Beliefs", size(medium)) ytitle("") ///
graphregion(color(white)) bgcolor(white)      ///
ytitle("Share of Respondents (%)", size(medium) margin(zero) box bcolor(white))
graph export "$Figures/f5a_posteriorbeliefshighperf.eps", replace 

/*----------  Panel B: Low Performance Posterior Beliefs  ----------*/
* info_type == 2 is the low performance information treatment
local i = 2

* Get regression coefs (binary first stage)
use "$Data/main_survey_wide.dta", clear
keep if info_type == `i'
keep performance_percent_post performance_percent_pre info_randomization firm_id
gen id = _n 
reshape long performance_percent, i(id) j(wave) string
gen post = wave == "_post"

* Keep only observations where respondent was shown the information
keep if info_randomization == 1

* Estimate effect of info treatment
reg performance_percent ib0.post , cluster(firm_id)
store_estimates 1.post 
local info_ATE = r(beta) + " " + r(se)

* Generate plot for low performance treatment prior and posterior beliefs
use "$Data/main_survey_wide.dta", clear

local graying .6
keep if info_type == `i'
keep if info_randomization == 1

keep performance_percent_post performance_percent_pre firm_id
gen id = _n 
reshape long performance_percent, i(id) j(wave) string
gen post = wave == "_post"

if `i' == 1 local highlight "maroon"
if `i' == 2 local highlight "navy" 

if `i' == 1 local truth_line xline(87, lwidth(medthick) lpattern(dash_dot)  lcolor(maroon)) 
if `i' == 2 local truth_line xline(3, lwidth(medthick) lpattern(dash_dot) lcolor(navy)) 

#delim  ; 

if `i' == 1 local labels 
(scatteri 65 85 "Posterior. {&Delta} = `info_ATE'  ", mlabpos(10)  msymbol(i) mlabsize(medium)  mlabcolor(`highlight'))
(scatteri 10 80 "Prior", mlabpos(10)  msymbol(i) mlabsize(medium) mlabcolor(`highlight'*`graying'));
if `i' == 2 local labels 
(scatteri 65 6 "Posterior. {&Delta} = `info_ATE'", mlabpos(2)  msymbol(i) mlabsize(medium)  mlabcolor(`highlight'))
(scatteri 12 10 "Prior", mlabpos(2)  msymbol(i) mlabsize(medium) mlabcolor(`highlight'*`graying'));

#delim cr 

egen bin = cut(performance_percent), at(0(5)100)
gcollapse (count) performance_percent, by(wave bin)
drop if missing(bin)

egen total = total(performance_percent), by(wave)

expand 2 if bin < 100, gen(right) 
replace bin = bin + 5 if right == 1

gen order = - right 
sort bin order

gen pdf = performance_percent/total * 100

* Generate graph
twoway (line pdf bin if wave == "_post",  lwidth(medthick) lcolor(`highlight')) ///
(line pdf bin if wave == "_pre",  lwidth(medthick) lcolor(`highlight'*`graying')) ///
`labels' /// 
, `paper_aspect' `truth_line' xlabel(0(20)100, labsize(large)) ylabel(0(25)75, labsize(large) angle(0)) legend(off) ///
xtitle("Low-Performance Beliefs", size(medium)) ytitle("") ///
graphregion(color(white)) bgcolor(white)      ///
ytitle("Share of Respondents (%)", size(medium) margin(zero) box bcolor(white))

graph export "$Figures/f5b_posteriorbeliefslowperf.pdf", replace  

* Generate graph in black and white
local highlight "gs2"
twoway (line pdf bin if wave == "_post",  lwidth(medthick) lcolor(`highlight')) ///
(line pdf bin if wave == "_pre",  lwidth(medthick) lcolor(`highlight'*`graying')) ///
(scatteri 65 6 "Posterior. {&Delta} = `info_ATE'", mlabpos(2)  msymbol(i) mlabsize(medium)  mlabcolor(`highlight')) ///
(scatteri 12 10 "Prior", mlabpos(2)  msymbol(i) mlabsize(medium) mlabcolor(`highlight'*`graying')) /// 
, `paper_aspect' xline(3, lwidth(medthick) lpattern(dash_dot) lcolor(`highlight'))  xlabel(0(20)100, labsize(large)) ylabel(0(25)75, labsize(large) angle(0)) legend(off) ///
xtitle("Low-Performance Beliefs", size(medium)) ytitle("") ///
graphregion(color(white)) bgcolor(white)      ///
ytitle("Share of Respondents (%)", size(medium) margin(zero) box bcolor(white))

graph export "$Figures/f5b_posteriorbeliefslowperf.eps", replace  

log close
exit
